#!/usr/bin/python27
#coding=utf8
import urllib2,sys
from bs4 import BeautifulSoup
import re
import uuid
from cvm.common.RunSql import RunSQl

reload(sys)

sys.setdefaultencoding('utf8')

"""
从汽车之家 http://car.autohome.com.cn/AsLeftMenu/As_LeftListNew.ashx?typeId=1%20&brandId=117%20&fctId=0%20&seriesId=0
爬取报价 ,插入数据库
"""

def get_uuid():
    guid = uuid.uuid1()
    return str(guid)

def get_html(url):               ###通过URL获取网页
    html = urllib2.urlopen(url)
    data = html.read()
    return data

def get_left_cat_list_tree():    ###获取左侧菜单树
    url = 'http://car.autohome.com.cn/AsLeftMenu/As_LeftListNew.ashx?typeId=1%20&brandId=117%20&fctId=0%20&seriesId=0'   ###左侧菜单数url
    data = get_html(url)
    get_every_car_href(data)


def get_every_car_href(data):    ####获取汽车的大类，和链接地址
    list_dalei = []
    url_title = 'http://car.autohome.com.cn'
    soup = BeautifulSoup(data, 'html.parser')
    dalei = soup.find_all('li')
    for i,dalei_i in enumerate(dalei):
        if i == 0:
            soup_3 = BeautifulSoup(str(dalei_i).decode('utf-8'), 'html.parser')
            # print dalei_i
            href_1 = url_title + str(soup_3.li.a['href'])
            # print href_1
            cm_tmp_1 = re.sub('^<a.*</i>', '', str(soup_3.li.a))
            cm = re.sub('<em>.*</a>', '', cm_tmp_1)
            # print cm,href_1
            list_dalei.append((get_uuid(),cm,href_1))
        else:
            # pass
            soup_2 = BeautifulSoup(str(dalei_i).decode('utf-8'),'html.parser')
            href = url_title + str(soup_2.li.a['href'])
            cm_tmp_1 = re.sub('^<a.*</i>', '', str(soup_2.li.a))
            cm = re.sub('<em>.*</a>', '', cm_tmp_1)
            # print cm,href
            list_dalei.append((get_uuid(),cm,href))
    get_info_by_dalei(list_dalei)

def get_info_by_dalei(list_dalei):
    car_map = {}
    type_0_list = []
    type_1_list = []
    car_map[0] = type_0_list
    car_map[1] = type_1_list
    for info_i in list_dalei:                                    ### 大类
        print ',',
        # print info_i
        type_0_list.append(info_i)
        data = get_html(info_i[2])
        soup = BeautifulSoup(data, 'html.parser')
        er_lei = soup.find_all('div', 'tab-content fn-visible')
        try:
            soup_1 = BeautifulSoup(str(er_lei[0]), 'html.parser')
            every_car = soup_1.find_all('div', 'list-cont')
            # print len(every_car)
            for every_car_i in every_car:
                soup_2 = BeautifulSoup(str(every_car[0]), 'html.parser')
                try:
                    img_src = str(soup_2.div.div.a.img['src'])
                except Exception,e:
                    img_src = ''
                # print img_src

                soup_3 = BeautifulSoup(str(every_car_i), 'html.parser')
                xxxxx = soup_3.find_all('div', 'list-cont-main')
                # print xxxxx[0]
                try:
                    car_name = str(xxxxx[0].div.a.string)
                except Exception,e:
                    car_name = ''
                # print car_name

                soup_4 = BeautifulSoup(str(xxxxx[0]), 'html.parser')
                # print soup_4
                soup_5 = BeautifulSoup(str(soup_4.div.ul), 'html.parser')
                all_li = soup_5.find_all('li')
                for j,all_li_i in enumerate(all_li):
                    soup_6 = BeautifulSoup(str(all_li_i).decode('utf-8'), 'html.parser')
                    # print soup_6
                    if j == 0:
                        try:
                            type_car = str(soup_6.li.span).replace('<span class="info-gray">', '').replace('</span>', '')
                        except Exception,e:
                            type_car = ''
                        # print type_car
                    # print all_li_i
                    elif j == 1:
                        try:
                            structure = str(soup_6.li.a.string)
                        except Exception,e:
                            structure = ''
                        # print structure
                    elif j == 2:
                        try:
                            engine = str(soup_6.li.span.a.string)
                        except Exception,e:
                            engine = ''
                        # print engine
                    elif j == 3:
                        try:
                            gearbox = str(soup_6.li.a.string)
                        except Exception,e:
                            gearbox = ''
                        # print gearbox

                ccccc =  str(soup_4.find_all('div', 'main-lever-right')[0])
                soup_7 = BeautifulSoup(ccccc, 'html.parser')
                try:
                    price = str(soup_7.div.span.string)
                except Exception,e:
                    price = ''
                # print price
                peifen = str(soup_7.find_all('div', 'score-cont')[0])
                soup_8 = BeautifulSoup(peifen, 'html.parser')
                try:
                    mark = str(soup_8.span.string)
                except Exception,e:
                    mark = ''
                # print mark,type(mark)
                if mark == 'None':
                    pf = soup_8.find_all('span', 'score-number')
                    soup_9 = BeautifulSoup(str(pf), 'html.parser')
                    mark = str(soup_9.span.string)
                    # print str(mark)
                type_1_list.append((get_uuid(),info_i[0],car_name,type_car,engine,structure,gearbox,img_src,price,mark))
            # break
            # sys.exit(0)
        except Exception,e:
            pass
    # print type_1_list
    insert_into_database(car_map)

def insert_into_database(car_map):                     ### 分类拼 sql
    runSql = RunSQl()
    sql_list = []
    type_0_list = car_map[0]
    type_1_list = car_map[1]
    for type_0_list_i in type_0_list:
        sql="""insert into cvm_car_price (id,up_id,car_type_code,car_name,create_time)
            VALUES ('%s','0','0','%s',now())
            """ % (type_0_list_i[0],type_0_list_i[1])
        sql_list.append(sql)
    for type_1_list_i in type_1_list:
        sql = """
            insert into cvm_car_price (id,up_id,car_type_code,car_name,type_car,engine,structure,gearbox,img_src,price,mark,create_time)
            VALUES ('%s','%s','1','%s','%s','%s','%s','%s','%s','%s','%s',now())
        """ % (type_1_list_i[0], type_1_list_i[1], type_1_list_i[2], type_1_list_i[3], type_1_list_i[4], type_1_list_i[5],
               type_1_list_i[6], type_1_list_i[7], type_1_list_i[8], type_1_list_i[9])
        sql_list.append(sql)
    runSql.create_sqllist_file(sql_list)                        #### 写入数据库

if __name__ == '__main__':
    get_left_cat_list_tree()